{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ccdc import io\n",
    "csd_reader = io.EntryReader('CSD')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1161919"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(csd_reader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "entry = csd_reader.entry('ABAVIJ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"ABAVIJ.cif\", \"w\") as f:\n",
    "    f.write(entry.to_string(\"cif\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "84898"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open(\"/mnt/data1/csd/CCDC/CSD_2022/csd/subsets/CSD_MOF_subsets/Non-disordered_MOF_subset.gcd\", 'r') as f:\n",
    "    MOFS = f.readlines()\n",
    "\n",
    "len(MOFS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "84898"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MOFS_ENTRY = [csd_reader.entry(x) for x in MOFS]\n",
    "len(MOFS_ENTRY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10.1021/cg801208z\n"
     ]
    }
   ],
   "source": [
    "x = MOFS_ENTRY[1000]\n",
    "print(x.publication.doi)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'entry': {'analogue': None, 'attributes': {}, 'bioactivity': None, 'calculated_density': '1.641', 'ccdc_number': '234593', 'chemical_name': 'catena-(tetrakis(μ3-isonicotinato)-di-cobalt) ethanol clathrate', 'chemical_name_as_html': 'catena-(tetrakis(μ<sub>3</sub>-isonicotinato)-di-cobalt) ethanol clathrate', 'color': 'pink', 'cross_references': '()', 'database_name': 'as543be_ASER', 'deposition_date': '2004-11-10', 'disorder_details': None, 'doi': '', 'formula': '(C24 H16 Co2 N4 O8)n,n(C2 H6 O1)', 'habit': 'plate', 'has_3d_structure': True, 'has_disorder': False, 'heat_capacity': \"(-999.9, '')\", 'heat_capacity_notes': '', 'heat_of_fusion': \"(0.0, 0.0, '')\", 'heat_of_fusion_notes': '', 'identifier': 'ABAVIJ', 'input_melting_point_range': '(0.0, 0.0, None)', 'input_melting_point_text': None, 'is_organic': False, 'is_organometallic': True, 'is_polymeric': True, 'is_powder_study': False, 'melting_point': None, 'melting_point_default_units': 'K', 'peptide_sequence': None, 'phase_transition': None, 'polymorph': None, 'pressure': None, 'previous_identifier': 'K0500601', 'publication': \"Citation(authors='Qiang Wei, M.Nieuwenhuyzen, F.Meunier, C.Hardacre, S.L.James', journal='Journal(Dalton Transactions)', volume='', year=2004, first_page='1807', doi='10.1039/b404485a')\", 'publications': \"(Citation(authors='Qiang Wei, M.Nieuwenhuyzen, F.Meunier, C.Hardacre, S.L.James', journal='Journal(Dalton Transactions)', volume='', year=2004, first_page='1807', doi='10.1039/b404485a'),)\", 'r_factor': '4.25', 'radiation_source': 'X-ray', 'remarks': None, 'solvent': None, 'source': None, 'synonyms': '()', 'synonyms_as_html': '()', 'temperature': 'at 153 K'}, 'crystal': {'calculated_density': '1.6407884882007202', 'cell_angles': 'CellAngles(alpha=90.0, beta=105.267, gamma=90.0)', 'cell_lengths': 'CellLengths(a=9.8064, b=26.165, c=10.6685)', 'cell_volume': '2640.76735205985', 'crystal_system': 'monoclinic', 'formula': 'C26 H22 Co2 N4 O9', 'has_disorder': False, 'identifier': 'ABAVIJ', 'is_centrosymmetric': False, 'is_sohncke': False, 'lattice_centring': 'C-centred', 'packing_coefficient': '0.7001357082273113', 'spacegroup_number_and_setting': '(9, 1)', 'spacegroup_symbol': 'Cc', 'symmetry_operators': \"('x,y,z', 'x,-y,1/2+z', '1/2+x,1/2+y,z', '1/2+x,1/2-y,1/2+z')\", 'z_prime': '1.0', 'z_value': '4.0'}, 'molecule': {'all_atoms_have_sites': True, 'atoms': '[Atom(Co1), Atom(Co2), Atom(N1), Atom(C1), Atom(H1), Atom(C2), Atom(H2), Atom(C3), Atom(C4), Atom(H3), Atom(C5), Atom(H4), Atom(C6), Atom(O1), Atom(O2), Atom(N2), Atom(C7), Atom(H5), Atom(C8), Atom(H6), Atom(C9), Atom(C10), Atom(H7), Atom(C11), Atom(H8), Atom(C12), Atom(O3), Atom(O4), Atom(N3), Atom(C13), Atom(H9), Atom(C14), Atom(H10), Atom(C15), Atom(C16), Atom(H11), Atom(C17), Atom(H12), Atom(C18), Atom(O5), Atom(O6), Atom(N4), Atom(C19), Atom(H13), Atom(C20), Atom(H14), Atom(C21), Atom(C22), Atom(H15), Atom(C23), Atom(H16), Atom(C24), Atom(O7), Atom(O8), Atom(O2A), Atom(O6C), Atom(N2B), Atom(O4A), Atom(O5B), Atom(N4), Atom(O7A), Atom(Co1A), Atom(Co2B), Atom(Co2A), Atom(Co2B), Atom(Co1C), Atom(Co2), Atom(O9), Atom(H17), Atom(C25), Atom(H18), Atom(H19), Atom(C26), Atom(H20), Atom(H21), Atom(H22)]', 'bonds': '[Bond(Single Atom(Co1) Atom(N1)), Bond(Single Atom(Co2) Atom(O1)), Bond(Aromatic Atom(N1) Atom(C1)), Bond(Single Atom(C1) Atom(H1)), Bond(Aromatic Atom(C2) Atom(C1)), Bond(Single Atom(H2) Atom(C2)), Bond(Aromatic Atom(C3) Atom(C2)), Bond(Aromatic Atom(C4) Atom(C3)), Bond(Single Atom(H3) Atom(C4)), Bond(Aromatic Atom(C5) Atom(N1)), Bond(Single Atom(H4) Atom(C5)), Bond(Single Atom(C6) Atom(C3)), Bond(Delocalised Atom(O1) Atom(C6)), Bond(Delocalised Atom(O2) Atom(C6)), Bond(Aromatic Atom(N2) Atom(C7)), Bond(Single Atom(C7) Atom(H5)), Bond(Aromatic Atom(C8) Atom(C7)), Bond(Single Atom(H6) Atom(C8)), Bond(Aromatic Atom(C9) Atom(C8)), Bond(Aromatic Atom(C10) Atom(C9)), Bond(Single Atom(H7) Atom(C10)), Bond(Aromatic Atom(C11) Atom(N2)), Bond(Single Atom(H8) Atom(C11)), Bond(Single Atom(C12) Atom(C9)), Bond(Single Atom(O3) Atom(Co1)), Bond(Delocalised Atom(O4) Atom(C12)), Bond(Single Atom(N3) Atom(Co1)), Bond(Aromatic Atom(C13) Atom(N3)), Bond(Single Atom(H9) Atom(C13)), Bond(Aromatic Atom(C14) Atom(C13)), Bond(Single Atom(H10) Atom(C14)), Bond(Aromatic Atom(C15) Atom(C14)), Bond(Aromatic Atom(C16) Atom(C15)), Bond(Single Atom(H11) Atom(C16)), Bond(Aromatic Atom(C17) Atom(N3)), Bond(Single Atom(H12) Atom(C17)), Bond(Single Atom(C18) Atom(C15)), Bond(Delocalised Atom(O5) Atom(C18)), Bond(Delocalised Atom(O6) Atom(C18)), Bond(Aromatic Atom(N4) Atom(C19)), Bond(Single Atom(C19) Atom(H13)), Bond(Aromatic Atom(C20) Atom(C19)), Bond(Single Atom(H14) Atom(C20)), Bond(Aromatic Atom(C21) Atom(C20)), Bond(Aromatic Atom(C22) Atom(C21)), Bond(Single Atom(H15) Atom(C22)), Bond(Aromatic Atom(C23) Atom(N4)), Bond(Single Atom(H16) Atom(C23)), Bond(Single Atom(C24) Atom(C21)), Bond(Delocalised Atom(O7) Atom(C24)), Bond(Single Atom(O8) Atom(Co1)), Bond(Single Atom(O9) Atom(H17)), Bond(Single Atom(C25) Atom(O9)), Bond(Single Atom(H18) Atom(C25)), Bond(Single Atom(H19) Atom(C25)), Bond(Single Atom(C26) Atom(C25)), Bond(Single Atom(H20) Atom(C26)), Bond(Single Atom(H21) Atom(C26)), Bond(Single Atom(H22) Atom(C26)), Bond(Unknown Atom(O2A) Atom(Co1)), Bond(Unknown Atom(O6C) Atom(Co1)), Bond(Unknown Atom(N2B) Atom(Co2)), Bond(Unknown Atom(O4A) Atom(Co2)), Bond(Unknown Atom(O5B) Atom(Co2)), Bond(Unknown Atom(N4) Atom(Co2)), Bond(Unknown Atom(O7A) Atom(Co2)), Bond(Unknown Atom(Co1A) Atom(O2)), Bond(Unknown Atom(Co2B) Atom(N2)), Bond(Unknown Atom(Co2A) Atom(O4)), Bond(Unknown Atom(Co2B) Atom(O5)), Bond(Unknown Atom(Co1C) Atom(O6)), Bond(Unknown Atom(Co2) Atom(N4)), Bond(Aromatic Atom(C4) Atom(C5)), Bond(Aromatic Atom(C10) Atom(C11)), Bond(Delocalised Atom(C12) Atom(O3)), Bond(Aromatic Atom(C16) Atom(C17)), Bond(Aromatic Atom(C22) Atom(C23)), Bond(Delocalised Atom(C24) Atom(O8)), Bond(Unknown Atom(O7) Atom(Co2A))]', 'contains_unknown_bonds': True, 'formal_charge': '0', 'formula': '(C26 H22 Co2 N4 O9)n', 'heavy_atoms': '[Atom(Co1), Atom(Co2), Atom(N1), Atom(C1), Atom(C2), Atom(C3), Atom(C4), Atom(C5), Atom(C6), Atom(O1), Atom(O2), Atom(N2), Atom(C7), Atom(C8), Atom(C9), Atom(C10), Atom(C11), Atom(C12), Atom(O3), Atom(O4), Atom(N3), Atom(C13), Atom(C14), Atom(C15), Atom(C16), Atom(C17), Atom(C18), Atom(O5), Atom(O6), Atom(N4), Atom(C19), Atom(C20), Atom(C21), Atom(C22), Atom(C23), Atom(C24), Atom(O7), Atom(O8), Atom(O2A), Atom(O6C), Atom(N2B), Atom(O4A), Atom(O5B), Atom(N4), Atom(O7A), Atom(Co1A), Atom(Co2B), Atom(Co2A), Atom(Co2B), Atom(Co1C), Atom(Co2), Atom(O9), Atom(C25), Atom(C26)]', 'identifier': 'ABAVIJ', 'is_3d': True, 'is_organic': False, 'is_organometallic': True, 'is_polymeric': True, 'largest_ring_size': '8', 'molecular_volume': '737.1708545947535', 'molecular_weight': '1113.9486000000004', 'rings': '[Atom(Co2A)-Atom(O4)-Atom(C12)-Atom(O3)-Atom(Co1)-Atom(O8)-Atom(C24)-Atom(O7), Atom(C5)-Atom(N1)-Atom(C1)-Atom(C2)-Atom(C3)-Atom(C4), Atom(C11)-Atom(N2)-Atom(C7)-Atom(C8)-Atom(C9)-Atom(C10), Atom(C17)-Atom(C16)-Atom(C15)-Atom(C14)-Atom(C13)-Atom(N3), Atom(C23)-Atom(N4)-Atom(C19)-Atom(C20)-Atom(C21)-Atom(C22)]', 'smallest_ring_size': '6', 'smiles': None}}\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "with open(\"dic.json\") as f:\n",
    "    dic = json.load(f)\n",
    "print(dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ABACUF\n",
      "\n",
      "{'analogue': None, 'attributes': {}, 'bioactivity': None, 'calculated_density': 2.823812595841845, 'ccdc_number': 1100034, 'chemical_name': \"catena(Tetra-aqua-tetrakis(μ2-formato-O,O')-bis(formato-O)-di-barium-copper)\", 'chemical_name_as_html': \"catena(Tetra-aqua-tetrakis(μ<sub>2</sub>-formato-O,O')-bis(formato-O)-di-barium-copper)\", 'color': None, 'cross_references': (), 'database_name': 'as543be_ASER', 'deposition_date': datetime.date(1971, 12, 31), 'disorder_details': None, 'doi': None, 'formula': '(C6 H14 Ba2 Cu1 O16)n', 'habit': None, 'has_3d_structure': True, 'has_disorder': False, 'heat_capacity': (-999.9, ''), 'heat_capacity_notes': '', 'heat_of_fusion': (0.0, 0.0, ''), 'heat_of_fusion_notes': '', 'identifier': 'ABACUF', 'input_melting_point_range': (0.0, 0.0, None), 'input_melting_point_text': None, 'is_organic': False, 'is_organometallic': True, 'is_polymeric': True, 'is_powder_study': False, 'melting_point': None, 'melting_point_default_units': 'K', 'peptide_sequence': None, 'phase_transition': None, 'polymorph': None, 'pressure': None, 'previous_identifier': 'ABACUF', 'publication': Citation(authors='R.V.G.S.Rao, K.Sundarama, G.S.Rao', journal='Journal(Zeitschrift fur Kristallographie, Kristallgeomie, Krystallphysik, Kristallchemie [1921-1977])', volume='110', year=1958, first_page='231', doi=None), 'publications': (Citation(authors='R.V.G.S.Rao, K.Sundarama, G.S.Rao', journal='Journal(Zeitschrift fur Kristallographie, Kristallgeomie, Krystallphysik, Kristallchemie [1921-1977])', volume='110', year=1958, first_page='231', doi=None),), 'r_factor': 13.8, 'radiation_source': 'X-ray', 'remarks': 'Re-refinement of this data is discussed in ABACUF01', 'solvent': None, 'source': None, 'synonyms': (), 'synonyms_as_html': (), 'temperature': None, 'cell_angles': CellAngles(alpha=99.05, beta=109.35000000000001, gamma=82.33000000000001), 'cell_lengths': CellLengths(a=8.75, b=7.16, c=6.88), 'cell_volume': 400.08772663640286, 'crystal_system': 'triclinic', 'is_centrosymmetric': True, 'is_sohncke': False, 'lattice_centring': 'primitive', 'packing_coefficient': 0.7205627705627705, 'spacegroup_number_and_setting': (2, 1), 'spacegroup_symbol': 'P-1', 'symmetry_operators': ('x,y,z', '-x,-y,-z'), 'z_prime': 0.5, 'z_value': 1.0, 'all_atoms_have_sites': False, 'contains_unknown_bonds': True, 'formal_charge': 0, 'is_3d': True, 'largest_ring_size': 4, 'molecular_volume': None, 'molecular_weight': 987.0179999999992, 'rings': [Atom(O3)-Atom(Cu1)-Atom(O7A)-Atom(Ba1), Atom(O3A)-Atom(Ba1A)-Atom(O7)-Atom(Cu1), Atom(O2)-Atom(Ba1)-Atom(O1)-Atom(C1), Atom(O2A)-Atom(Ba1A)-Atom(O1A)-Atom(C1A)], 'smallest_ring_size': 4, 'smiles': None}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception ignored in: <built-in function delete_SolubilityInfo>\n",
      "RuntimeError: The Solubility Platform is not available.\n",
      "To enable this functionality contact admin@ccdc.cam.ac.uk\n",
      "\n",
      "Exception ignored in: <built-in function delete_SolubilityInfo>\n",
      "RuntimeError: The Solubility Platform is not available.\n",
      "To enable this functionality contact admin@ccdc.cam.ac.uk\n",
      "\n",
      "Exception ignored in: <built-in function delete_SolubilityInfo>\n",
      "RuntimeError: The Solubility Platform is not available.\n",
      "To enable this functionality contact admin@ccdc.cam.ac.uk\n",
      "\n",
      "Exception ignored in: <built-in function delete_SolubilityInfo>\n",
      "RuntimeError: The Solubility Platform is not available.\n",
      "To enable this functionality contact admin@ccdc.cam.ac.uk\n",
      "\n",
      "Exception ignored in: <built-in function delete_SolubilityInfo>\n",
      "RuntimeError: The Solubility Platform is not available.\n",
      "To enable this functionality contact admin@ccdc.cam.ac.uk\n",
      "\n",
      "Exception ignored in: <built-in function delete_SolubilityInfo>\n",
      "RuntimeError: The Solubility Platform is not available.\n",
      "To enable this functionality contact admin@ccdc.cam.ac.uk\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for x in MOFS:\n",
    "    print(x)\n",
    "    data = {}\n",
    "    for t in dic:\n",
    "        target = getattr(csd_reader, t)(x)\n",
    "        for k in dic[t]:\n",
    "            if k in ['heavy_atoms', 'atoms', 'bonds']:  # 二次过滤\n",
    "                continue\n",
    "            data[k] = getattr(target, k)\n",
    "    data[\"doi\"] = data[\"publication\"].doi\n",
    "    print(data)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pandas import DataFrame\n",
    "\n",
    "pd = DataFrame(columns=data.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(pd)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sqlalchemy import create_engine\n",
    "from sqlalchemy.types import NVARCHAR, Float, Integer\n",
    "engine = create_engine('mysql+mysqlconnector://root:digitalmofs@actvis.cn:9049/mofsdb')\n",
    "con = engine.connect()\n",
    "\n",
    "dtypedict = {\n",
    "  'str': NVARCHAR(length=255),\n",
    "  'int': Integer(),\n",
    "  'float': Float()\n",
    "}\n",
    "\n",
    "pd.to_sql(name='test', con=con, if_exists='append', index=False)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "7ed5c1bb94f52fd010b3d3ccf5daae836e7d243cc9c2685aabc4fdf5d9323919"
  },
  "kernelspec": {
   "display_name": "Python 3.7.11 64-bit ('csd': conda)",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
